#https://datatables.net/reference/option/
options(DT.options = list(scrollX = TRUE, pagin=TRUE, fixedHeader = TRUE, searchHighlight = TRUE))| Name | train |
| Number of rows | 56375 |
| Number of columns | 31 |
| _______________________ | |
| Column type frequency: | |
| Date | 1 |
| factor | 13 |
| numeric | 17 |
| ________________________ | |
| Group variables | None |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| reservation_status_date | 0 | 1 | 2015-07-01 | 2017-09-14 | 2016-09-03 | 805 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| agent | 0 | 1 | FALSE | 301 | 9: 14077, NUL: 9223, 240: 6333, 7: 2285 |
| arrival_date_month | 0 | 1 | FALSE | 12 | Aug: 6492, Jul: 5869, May: 5297, Oct: 5208 |
| assigned_room_type | 0 | 1 | FALSE | 10 | A: 30827, D: 14290, E: 4365, F: 2096 |
| company | 0 | 1 | FALSE | 310 | NUL: 52171, 40: 628, 223: 503, 45: 150 |
| country | 0 | 1 | FALSE | 156 | PRT: 15759, GBR: 7226, FRA: 6425, ESP: 4768 |
| customer_type | 0 | 1 | FALSE | 4 | Tra: 39832, Tra: 13983, Con: 2158, Gro: 402 |
| deposit_type | 0 | 1 | FALSE | 3 | No : 56212, Ref: 93, Non: 70 |
| distribution_channel | 0 | 1 | FALSE | 5 | TA/: 43311, Dir: 9038, Cor: 3917, GDS: 108 |
| hotel | 0 | 1 | FALSE | 2 | Cit: 34665, Res: 21710 |
| market_segment | 0 | 1 | FALSE | 7 | Onl: 26836, Off: 11933, Dir: 8002, Gro: 5749 |
| meal | 0 | 1 | FALSE | 5 | BB: 43389, HB: 7108, SC: 4988, Und: 641 |
| reservation_status | 0 | 1 | FALSE | 1 | Che: 56375, Can: 0, No-: 0 |
| reserved_room_type | 0 | 1 | FALSE | 9 | A: 39242, D: 9900, E: 3484, F: 1489 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| adr | 0 | 1 | 100.01 | 49.17 | -6.38 | 67.5 | 92.4 | 125 | 510 | <U+2587><U+2586><U+2581><U+2581><U+2581> |
| adults | 0 | 1 | 1.83 | 0.51 | 0.00 | 2.0 | 2.0 | 2 | 4 | <U+2581><U+2582><U+2587><U+2581><U+2581> |
| arrival_date_day_of_month | 0 | 1 | 15.84 | 8.79 | 1.00 | 8.0 | 16.0 | 23 | 31 | <U+2587><U+2587><U+2587><U+2587><U+2586> |
| arrival_date_week_number | 0 | 1 | 27.11 | 13.91 | 1.00 | 16.0 | 28.0 | 38 | 53 | <U+2586><U+2587><U+2587><U+2587><U+2586> |
| arrival_date_year | 0 | 1 | 2016.15 | 0.70 | 2015.00 | 2016.0 | 2016.0 | 2017 | 2017 | <U+2583><U+2581><U+2587><U+2581><U+2586> |
| babies | 0 | 1 | 0.01 | 0.12 | 0.00 | 0.0 | 0.0 | 0 | 10 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| booking_changes | 0 | 1 | 0.30 | 0.74 | 0.00 | 0.0 | 0.0 | 0 | 21 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| children | 0 | 1 | 0.10 | 0.39 | 0.00 | 0.0 | 0.0 | 0 | 3 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| days_in_waiting_list | 0 | 1 | 1.65 | 15.12 | 0.00 | 0.0 | 0.0 | 0 | 379 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| is_repeated_guest | 0 | 1 | 0.04 | 0.20 | 0.00 | 0.0 | 0.0 | 0 | 1 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| lead_time | 0 | 1 | 80.23 | 91.28 | 0.00 | 9.0 | 45.0 | 125 | 709 | <U+2587><U+2582><U+2581><U+2581><U+2581> |
| previous_bookings_not_canceled | 0 | 1 | 0.20 | 1.82 | 0.00 | 0.0 | 0.0 | 0 | 72 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| previous_cancellations | 0 | 1 | 0.02 | 0.28 | 0.00 | 0.0 | 0.0 | 0 | 13 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| required_car_parking_spaces | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.0 | 0.0 | 0 | 8 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| stays_in_week_nights | 0 | 1 | 2.46 | 1.92 | 0.00 | 1.0 | 2.0 | 3 | 42 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| stays_in_weekend_nights | 0 | 1 | 0.93 | 0.99 | 0.00 | 0.0 | 1.0 | 2 | 18 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| total_of_special_requests | 0 | 1 | 0.72 | 0.84 | 0.00 | 0.0 | 1.0 | 1 | 5 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
# make arrival date var
train = train %>% mutate(
arrival.date = make_date(
year = arrival_date_year,
month = match(arrival_date_month, month.name),
day = arrival_date_day_of_month)
)
# these numeric vars s/b factor vars
train = train %>% mutate_at(vars(arrival_date_day_of_month, arrival_date_week_number, arrival_date_year, is_repeated_guest), factor)
# reordering df
train = train %>% select(sort(tidyselect::peek_vars())) %>%
select(
where(is.Date),
where(is.factor),
where(is.numeric)
)paste(
'The date range of this dataset is from',
train %>% pull(arrival.date) %>% range %>% .[1],
'to',
train %>% pull(arrival.date) %>% range %>% .[2],
', just over 3 years of data.'
)## [1] "The date range of this dataset is from 2015-07-01 to 2017-08-31 , just over 3 years of data."
train %>% group_by(arrival.date, hotel) %>%
summarise(total.bookings = sum(adults, children)) %>%
arrange(arrival.date) %>%
plot_ly(
x = ~arrival.date,
y = ~total.bookings,
color = ~hotel,
alphtrain = 0.7
) %>% layout(
title = 'total.bookings by date/hotel',
xaxis = list(title = ''),
yaxis = list(title = '')
) train %>% group_by(arrival.date, customer_type) %>%
summarise(total.bookings = sum(adults, children)) %>%
arrange(arrival.date) %>%
plot_ly(
x = ~arrival.date,
y = ~total.bookings,
color = ~customer_type,
alphtrain = 0.7
) %>% layout(
title = 'total.bookings by date/customer_type',
xaxis = list(title = ''),
yaxis = list(title = '')
)train %>% group_by(arrival.date, deposit_type) %>%
summarise(total.bookings = sum(adults, children)) %>%
arrange(arrival.date) %>%
plot_ly(
x = ~arrival.date,
y = ~total.bookings,
color = ~deposit_type,
alphtrain = 0.7
) %>% layout(
title = 'total.bookings by date/deposit_type',
xaxis = list(title = ''),
yaxis = list(title = '')
)train %>% group_by(arrival.date, distribution_channel) %>%
summarise(total.bookings = sum(adults, children)) %>%
arrange(arrival.date) %>%
plot_ly(
x = ~arrival.date,
y = ~total.bookings,
color = ~distribution_channel,
alphtrain = 0.7
) %>% layout(
title = 'total.bookings by date/distribution_channel',
xaxis = list(title = ''),
yaxis = list(title = '')
)## Rows: 56,375
## Columns: 17
## $ agent <fct> NULL, NULL, 240, 240, NULL, 303, 241, 241...
## $ arrival_date_day_of_month <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ arrival_date_month <fct> July, July, July, July, July, July, July,...
## $ arrival_date_week_number <fct> 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 2...
## $ arrival_date_year <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015,...
## $ assigned_room_type <fct> C, C, A, A, C, C, G, E, E, G, F, A, A, D,...
## $ company <fct> NULL, NULL, NULL, NULL, NULL, NULL, NULL,...
## $ country <fct> PRT, GBR, GBR, GBR, PRT, PRT, ESP, PRT, P...
## $ customer_type <fct> Transient, Transient, Transient, Transien...
## $ deposit_type <fct> No Deposit, No Deposit, No Deposit, No De...
## $ distribution_channel <fct> Direct, Direct, TA/TO, TA/TO, Direct, Dir...
## $ hotel <fct> Resort Hotel, Resort Hotel, Resort Hotel,...
## $ is_repeated_guest <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ market_segment <fct> Direct, Direct, Online TA, Online TA, Dir...
## $ meal <fct> BB, BB, BB, BB, BB, FB, HB, BB, BB, BB, B...
## $ reservation_status <fct> Check-Out, Check-Out, Check-Out, Check-Ou...
## $ reserved_room_type <fct> C, A, A, A, C, C, G, E, E, G, F, A, A, D,...
jpal = colorRampPalette(brewer.pal(8,'Dark2'))(15)
train %>% select(where(is.factor)) %>%
map(n_unique) %>%
as.tibble() %>%
pivot_longer(everything()) %>%
plot_ly(y = ~name, x = ~value, color = ~name, colors = jpal) %>%
add_bars() %>%
hide_legend() %>%
layout(
title = 'distribution of level counts per factor',
xaxis = list(title = ''),
yaxis = list(title = '')
)## $agent
## [1] NULL 240 303 241 8 250 115 134 156 243 242 5 40 105 147
## [16] 306 184 175 96 2 127 95 6 244 15 167 300 305 196 152
## [31] 142 171 177 36 67 104 261 149 26 258 71 146 181 88 143
## [46] 251 275 69 256 314 110 126 281 208 253 330 328 326 334 185
## [61] 321 324 313 38 155 68 308 332 387 298 273 248 315 75 307
## [76] 436 201 183 223 94 3 16 468 446 327 34 78 385 339 139
## [91] 9 270 47 128 154 114 66 29 301 245 193 1 182 336 135
## [106] 348 350 195 352 355 159 10 384 363 360 375 331 367 91 64
## [121] 393 168 406 249 405 414 333 11 427 431 429 426 430 438 418
## [136] 441 282 432 72 450 434 454 455 59 368 254 57 180 358 464
## [151] 411 481 165 467 510 531 440 337 526 493 420 502 527 469 410
## [166] 479 508 163 535 302 13 7 27 22 17 28 14 42 20 19
## [181] 45 37 61 39 21 24 50 30 52 12 44 31 83 32 63
## [196] 56 89 86 79 132 4 70 82 81 74 92 99 85 87 112
## [211] 106 98 111 119 148 151 138 103 121 118 211 153 210 187 129
## [226] 213 174 220 173 216 232 35 23 58 205 157 133 150 214 290
## [241] 192 191 267 215 252 247 219 280 285 289 269 295 265 335 288
## [256] 122 325 234 341 310 344 77 346 359 283 364 370 33 371 25
## [271] 53 227 141 378 391 397 416 404 73 354 444 296 425 461 394
## [286] 390 388 453 262 459 474 229 475 480 423 484 495 509 449
## 334 Levels: 1 10 103 104 105 106 107 11 110 111 112 114 115 117 118 119 ... NULL
##
## $arrival_date_day_of_month
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31
## 31 Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 ... 31
##
## $arrival_date_month
## [1] July August September October November December January
## [8] February March April May June
## 12 Levels: April August December February January July June March ... September
##
## $arrival_date_week_number
## [1] 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
## [26] 52 53 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [51] 24 25 26
## 53 Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 ... 53
##
## $arrival_date_year
## [1] 2015 2016 2017
## Levels: 2015 2016 2017
##
## $assigned_room_type
## [1] C A G E F D I B H K
## Levels: A B C D E F G H I K L P
##
## $company
## [1] NULL 110 113 270 240 154 144 307 268 59 312 318 274 174 195
## [16] 223 317 118 281 47 324 342 371 286 88 331 178 405 367 337
## [31] 20 94 53 528 62 120 42 82 81 116 530 103 112 135 9
## [46] 39 204 16 92 31 61 356 12 457 501 86 165 291 292 290
## [61] 192 108 51 43 34 224 388 269 465 287 297 490 482 207 169
## [76] 282 437 263 225 329 28 200 338 72 319 146 159 380 323 407
## [91] 421 325 80 403 399 14 84 343 346 347 349 289 351 355 353
## [106] 99 250 358 361 390 362 366 365 277 109 377 379 22 378 83
## [121] 364 360 401 232 511 384 167 212 514 391 400 376 392 396 302
## [136] 370 397 369 409 251 168 104 382 408 413 148 10 333 419 415
## [151] 424 425 423 428 422 395 435 439 445 448 443 454 444 394 52
## [166] 459 458 456 460 447 470 255 466 484 184 485 32 487 491 494
## [181] 193 516 499 308 29 78 254 504 130 520 507 498 515 512 126
## [196] 64 330 242 477 518 521 523 539 534 436 525 541 40 455 45
## [211] 38 49 67 68 8 46 76 96 115 105 93 101 11 91 137
## [226] 139 142 127 107 140 143 149 150 163 85 180 238 219 179 221
## [241] 183 153 197 203 185 217 209 215 213 186 237 216 230 234 227
## [256] 246 245 218 158 258 259 260 411 272 257 271 18 275 210 273
## [271] 278 71 284 301 233 305 293 264 311 304 313 288 320 334 314
## [286] 332 341 352 350 372 73 383 368 393 220 412 420 410 426 417
## [301] 243 429 433 446 450 418 280 357 479 478 483 489 486 481 497
## [316] 451 492
## 353 Levels: 10 100 101 102 103 104 105 106 107 108 109 11 110 112 113 ... NULL
##
## $country
## [1] PRT GBR ESP IRL FRA NULL ROU NOR ARG USA POL DEU CHE CN GRC
## [16] ITA DNK RUS SWE EST CZE FIN AUS MOZ BWA NLD ALB BRA BEL IND
## [31] CHN MAR SVN UKR SMR LVA SRB CHL AUT BLR LTU OMN TUR MEX ISR
## [46] LUX ZMB CPV ZWE DZA KOR CRI HUN HRV CAF CYP NZL IDN THA DOM
## [61] NGA GIB ARM LKA CUB COL ZAF CMR IRN BIH MUS COM JAM UGA BGR
## [76] CIV JOR PRI JPN MYS AGO URY SAU SGP KWT LBN AND VNM PLW ARE
## [91] QAT EGY SVK SUR MDV MLT MWI ISL UZB ECU NPL KAZ BHS PAK MAC
## [106] TGO TWN HKG DJI PHL GEO TUN STP VEN AZE SEN PER KNA ETH IRQ
## [121] RWA MMR TMP BFA KEN MCO LBY PAN GNB TZA BGD BHR NAM BOL SYC
## [136] BRB AIA SLV PYF LIE ATA GAB MKD MNE GTM GHA ASM PRY SYR ABW
## [151] NCL ATF SLE LAO FRO
## 178 Levels: ABW AGO AIA ALB AND ARE ARG ARM ASM ATA ATF AUS AUT AZE BDI ... ZWE
##
## $customer_type
## [1] Transient Contract Transient-Party Group
## Levels: Contract Group Transient Transient-Party
##
## $deposit_type
## [1] No Deposit Refundable Non Refund
## Levels: No Deposit Non Refund Refundable
##
## $distribution_channel
## [1] Direct TA/TO Corporate Undefined GDS
## Levels: Corporate Direct GDS TA/TO Undefined
##
## $hotel
## [1] Resort Hotel City Hotel
## Levels: City Hotel Resort Hotel
##
## $is_repeated_guest
## [1] 0 1
## Levels: 0 1
##
## $market_segment
## [1] Direct Online TA Offline TA/TO Corporate Complementary
## [6] Groups Aviation
## 8 Levels: Aviation Complementary Corporate Direct Groups ... Undefined
##
## $meal
## [1] BB FB HB SC Undefined
## Levels: BB FB HB SC Undefined
##
## $reservation_status
## [1] Check-Out
## Levels: Canceled Check-Out No-Show
##
## $reserved_room_type
## [1] C A G E D F H L B
## Levels: A B C D E F G H L P
## Rows: 56,375
## Columns: 13
## $ adr <dbl> 0.00, 0.00, 75.00, 98.00, 98.00, 107...
## $ adults <dbl> 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...
## $ babies <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ booking_changes <dbl> 3, 4, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...
## $ children <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...
## $ days_in_waiting_list <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ lead_time <dbl> 342, 737, 7, 14, 14, 0, 9, 18, 37, 6...
## $ previous_bookings_not_canceled <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ previous_cancellations <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ required_car_parking_spaces <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ stays_in_week_nights <dbl> 0, 0, 1, 2, 2, 2, 2, 4, 4, 4, 4, 1, ...
## $ stays_in_weekend_nights <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ total_of_special_requests <dbl> 0, 0, 0, 1, 1, 0, 1, 1, 0, 3, 0, 0, ...
jquantiles = function(col){quantile(col, probs = c(0.90, 0.95, 0.99, 1))}
train %>% na.omit %>% select(where(is.numeric)) %>%
map(.x = . , jquantiles) %>%
as.data.frame.list() %>%
rownames_to_column() %>%
as.tibble()#with outliers
train %>% select(where(is.numeric)) %>% DataExplorer::plot_histogram(nrow = 2, ncol = 1)#no extreme right tailed outliers
train %>% select(where(is.numeric)) %>% filter(
adr != 510,
adults != 55,
babies != 10,
booking_changes != 21,
children != 10,
days_in_waiting_list != 391,
lead_time != 709,
previous_bookings_not_canceled != 72,
previous_cancellations != 26,
required_car_parking_spaces != 8,
stays_in_week_nights != 50,
stays_in_weekend_nights != 19
) %>% DataExplorer::plot_histogram(nrow = 2, ncol = 1)#no outliers
train %>% select(hotel, where(is.numeric)) %>% filter(
adr != 510,
adults != 55,
babies != 10,
booking_changes != 21,
children != 10,
days_in_waiting_list != 391,
lead_time != 709,
previous_bookings_not_canceled != 72,
previous_cancellations != 26,
required_car_parking_spaces != 8,
stays_in_week_nights != 50,
stays_in_weekend_nights != 19
) %>% DataExplorer::plot_boxplot(by = 'hotel', nrow = 3, ncol = 1)#check: check number of bookings by combination of adult and children counts
train %>% count(adults, children)train %>% count(adults, children) %>% filter(adults == 0) %>% summarise(total.bookings.without.adults = sum(n))train %>% count(adults, children) %>% filter(children == 0) %>% summarise(total.bookings.without.children = sum(n))train %>% count(adults, children) %>% filter(children == 0, adults == 0) %>% summarise(total.bookings.without.adults.and.children = sum(n))paste(
scales::percent(
nrow(train %>% filter(adults > 0, children == 0)) / nrow(train)
),
'of all observations have 1 or more adults and zero children.'
)## [1] "93% of all observations have 1 or more adults and zero children."
#creating adr per adult var
train.adult.adrs = train.adult.adrs %>% mutate(adr.per.adult = adr/adults)
train.adult.adrs %>%
plot_ly(y = ~hotel, x = ~adr.per.adult, color = ~hotel, colors = jpal[1:2]) %>%
add_boxplot() %>%
layout(
title = 'ADR per Adult by Hotel Type',
xaxis = list(title = ''),
yaxis = list(title = '')
)#https://stackoverflow.com/questions/57300053/split-a-plotly-boxplot-x-axis-by-group
train.adult.adrs %>%
plot_ly(y = ~hotel, x = ~adr.per.adult, color = ~customer_type, colors = jpal, group = ~customer_type) %>%
add_boxplot() %>%
layout(
boxmode = 'group', #SUPER IMPORTANT
title = 'ADR per Adult by Hotel/customer_type'
) #https://stackoverflow.com/questions/57300053/split-a-plotly-boxplot-x-axis-by-group
train.adult.adrs %>%
plot_ly(y = ~hotel, x = ~adr, color = ~market_segment, colors = jpal, group = ~market_segment) %>%
add_boxplot() %>%
layout(
boxmode = 'group', #SUPER IMPORTANT
title = 'ADR per Adult by Hotel/market_segment'
) #https://stackoverflow.com/questions/57300053/split-a-plotly-boxplot-x-axis-by-group
train.adult.adrs %>%
plot_ly(x = ~hotel, y = ~adr, color = ~arrival_date_month, colors = jpal, group = ~arrival_date_month) %>%
add_boxplot() %>%
layout(
boxmode = 'group', #SUPER IMPORTANT
title = 'ADR per Adult by Hotel/arrival_date_month',
hoverformat = '.0f'
) ##
## Attaching package: 'DescTools'
## The following object is masked from 'package:data.table':
##
## %like%
library(anomalize)
# time_decompose(data, target, method = c("stl", "twitter"), frequency = "auto", trend = "auto", ..., merge = FALSE, message = TRUE)
# anomalize(data, target, method = c("iqr", "gesd"), alpha = 0.05, max_anoms = 0.2, verbose = FALSE)
# The alpha parameter adjusts the width of the critical values. By default, alpha = 0.05.
# Lower values are more conservative while higher values are less prone to incorrectly classifying "normal" observations.
# max_anoms: The maximum percent of anomalies permitted to be identified.
# The STL method uses the stl() function from the stats package. STL works very well in circumstances where a long term trend is present (which applies in this case; see trend component in the prophet graphs below').
#use full data set, filter to hotel type, arrange by date
a1 = a %>% mutate(
arrival.date = make_date(
year = arrival_date_year,
month = match(arrival_date_month, month.name),
day = arrival_date_day_of_month)
)
(anomaly.hotel.resort = a1 %>% filter(hotel == 'Resort Hotel') %>%
group_by(arrival.date, hotel) %>%
summarise(total.bookings = sum(adults, children)) %>%
select(arrival.date, hotel, total.bookings) %>%
arrange(arrival.date) %>% as.tibble() %>%
time_decompose(total.bookings, method = 'stl', merge = TRUE) %>%
anomalize(remainder, alpha = 0.15, method = 'gesd') %>% #increasing sensitivity to outliers
time_recompose())(anomaly.hotel.city = a1 %>% filter(hotel == 'City Hotel') %>%
group_by(arrival.date, hotel) %>%
summarise(total.bookings = sum(adults, children)) %>%
select(arrival.date, hotel, total.bookings) %>%
arrange(arrival.date) %>% as.tibble() %>%
time_decompose(total.bookings, method = 'stl', merge = TRUE) %>%
anomalize(remainder, alpha = 0.15, method = 'gesd') %>% #increasing sensitivity to outliers
time_recompose())library(prophet)
library(dygraphs)
# https://dygraphs.com/options.html
#renaming cols to prophet's col conventions
prophet.resort.df = anomaly.hotel.resort %>% select(ds = arrival.date, y = total.bookings)
#creating model
prophet.resort.mdl = prophet.resort.df %>% prophet()
#using model make future period df
prophet.resort.future.df = prophet.resort.mdl %>% make_future_dataframe(
periods = 28, #next 4 wks
freq = 'day',
include_history = TRUE
)
#make forecasts df
prophet.resort.forecast.df = prophet.resort.mdl %>% predict(prophet.resort.future.df)
prophet.resort.forecast.df %>% head %>% DT::datatable()#plot forecast
prophet.resort.mdl %>% dyplot.prophet(
prophet.resort.forecast.df,
main = '<h style="color: black; font-size:18px;">Resort Hotel: Total Bookings 1 Month Prediction</h>'
) %>%
dygraphs::dyOptions(
colors = c('darkorange','blue'),
pointSize = 2,
)#plot forecast components
prophet.resort.mdl %>% prophet_plot_components(prophet.resort.forecast.df)library(prophet)
# https://dygraphs.com/options.html
#renaming cols to prophet's col conventions
prophet.city.df = anomaly.hotel.city %>% select(ds = arrival.date, y = total.bookings)
#creating model
prophet.city.mdl = prophet.city.df %>% prophet()## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
#using model make future period df
prophet.city.future.df = prophet.city.mdl %>% make_future_dataframe(
periods = 28, #next 4 wks
freq = 'day',
include_history = TRUE
)
#make forecasts df
prophet.city.forecast.df = prophet.city.mdl %>% predict(prophet.city.future.df)
prophet.city.forecast.df %>% head %>% DT::datatable()#plot forecast
prophet.city.mdl %>% dyplot.prophet(
prophet.city.forecast.df,
main = '<h style="color: black; font-size:18px;">City Hotel: Total Bookings 1 Month Prediction</h>'
) %>%
dygraphs::dyOptions(
colors = c('darkgreen','blue'),
pointSize = 2,
)